diff --git a/VoiceInk/Models/PredefinedModels.swift b/VoiceInk/Models/PredefinedModels.swift index 78ca49a..dcc2da6 100644 --- a/VoiceInk/Models/PredefinedModels.swift +++ b/VoiceInk/Models/PredefinedModels.swift @@ -5,37 +5,60 @@ struct PredefinedModel: Identifiable, Hashable { let name: String let displayName: String let size: String - let language: String + let supportedLanguages: [String: String] let description: String let speed: Double let accuracy: Double let ramUsage: Double let hash: String - + var downloadURL: String { "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/\(filename)" } - + var filename: String { "\(name).bin" } - + func hash(into hasher: inout Hasher) { hasher.combine(id) } - + static func == (lhs: PredefinedModel, rhs: PredefinedModel) -> Bool { lhs.id == rhs.id } + + var isMultilingualModel: Bool { + supportedLanguages.count > 1 + } + + var language: String { + isMultilingualModel ? "Multilingual" : "English-only" + } } -struct PredefinedModels { +enum PredefinedModels { + static func getLanguageDictionary(isMultilingual: Bool, isLargeV3: Bool = false) -> [String: + String] + { + if !isMultilingual { + return ["en": "English"] + } else if isLargeV3 { + return allLanguages // Large v3 models support all languages including Cantonese + } else { + // Create a dictionary without Cantonese for non-Large v3 models + var languagesWithoutCantonese = allLanguages + languagesWithoutCantonese.removeValue(forKey: "yue") + return languagesWithoutCantonese + } + } + static let models: [PredefinedModel] = [ PredefinedModel( name: "ggml-tiny", displayName: "Tiny", size: "75 MiB", - language: "Multilingual", + supportedLanguages: getLanguageDictionary(isMultilingual: true), description: "Tiny model, fastest, least accurate, supports multiple languages", speed: 0.95, accuracy: 0.6, @@ -46,7 +69,7 @@ struct PredefinedModels { name: "ggml-tiny.en", displayName: "Tiny (English)", size: "75 MiB", - language: "English", + supportedLanguages: getLanguageDictionary(isMultilingual: false), description: "Tiny model optimized for English, fastest, least accurate", speed: 0.95, accuracy: 0.65, @@ -57,8 +80,9 @@ struct PredefinedModels { name: "ggml-base", displayName: "Base", size: "142 MiB", - language: "Multilingual", - description: "Base model, good balance of speed and accuracy, supports multiple languages", + supportedLanguages: getLanguageDictionary(isMultilingual: true), + description: + "Base model, good balance of speed and accuracy, supports multiple languages", speed: 0.8, accuracy: 0.75, ramUsage: 0.5, @@ -68,7 +92,7 @@ struct PredefinedModels { name: "ggml-base.en", displayName: "Base (English)", size: "142 MiB", - language: "English", + supportedLanguages: PredefinedModels.getLanguageDictionary(isMultilingual: false), description: "Base model optimized for English, good balance of speed and accuracy", speed: 0.8, accuracy: 0.8, @@ -79,8 +103,9 @@ struct PredefinedModels { name: "ggml-small", displayName: "Small", size: "466 MiB", - language: "Multilingual", - description: "Small model, slower but more accurate than base, supports multiple languages", + supportedLanguages: getLanguageDictionary(isMultilingual: true), + description: + "Small model, slower but more accurate than base, supports multiple languages", speed: 0.6, accuracy: 0.85, ramUsage: 0.7, @@ -90,7 +115,7 @@ struct PredefinedModels { name: "ggml-small.en", displayName: "Small (English)", size: "466 MiB", - language: "English", + supportedLanguages: PredefinedModels.getLanguageDictionary(isMultilingual: false), description: "Small model optimized for English, slower but more accurate than base", speed: 0.6, accuracy: 0.9, @@ -101,7 +126,7 @@ struct PredefinedModels { name: "ggml-medium", displayName: "Medium", size: "1.5 GiB", - language: "Multilingual", + supportedLanguages: getLanguageDictionary(isMultilingual: true), description: "Medium model, slow but very accurate, supports multiple languages", speed: 0.4, accuracy: 0.92, @@ -112,7 +137,7 @@ struct PredefinedModels { name: "ggml-medium.en", displayName: "Medium (English)", size: "1.5 GiB", - language: "English", + supportedLanguages: PredefinedModels.getLanguageDictionary(isMultilingual: false), description: "Medium model optimized for English, slow but very accurate", speed: 0.4, accuracy: 0.95, @@ -123,7 +148,7 @@ struct PredefinedModels { name: "ggml-large-v3", displayName: "Large v3", size: "2.9 GiB", - language: "Multilingual", + supportedLanguages: getLanguageDictionary(isMultilingual: true, isLargeV3: true), description: "Large model v3, very slow but most accurate, supports multiple languages", speed: 0.2, accuracy: 0.98, @@ -134,7 +159,7 @@ struct PredefinedModels { name: "ggml-large-v3-q5_0", displayName: "Large v3 (Quantized)", size: "1.1 GiB", - language: "Multilingual", + supportedLanguages: getLanguageDictionary(isMultilingual: true, isLargeV3: true), description: "Quantized version of Large v3, faster with slightly lower accuracy", speed: 0.3, accuracy: 0.97, @@ -145,8 +170,9 @@ struct PredefinedModels { name: "ggml-large-v3-turbo", displayName: "Large v3 Turbo", size: "1.5 GiB", - language: "Multilingual", - description: "Large model v3 Turbo, faster than v3 with similar accuracy, supports multiple languages", + supportedLanguages: getLanguageDictionary(isMultilingual: true, isLargeV3: true), + description: + "Large model v3 Turbo, faster than v3 with similar accuracy, supports multiple languages", speed: 0.5, accuracy: 0.97, ramUsage: 1.8, @@ -156,12 +182,117 @@ struct PredefinedModels { name: "ggml-large-v3-turbo-q5_0", displayName: "Large v3 Turbo (Quantized)", size: "547 MiB", - language: "Multilingual", + supportedLanguages: getLanguageDictionary(isMultilingual: true, isLargeV3: true), description: "Quantized version of Large v3 Turbo, faster with slightly lower accuracy", speed: 0.6, accuracy: 0.96, ramUsage: 1.0, hash: "e050f7970618a659205450ad97eb95a18d69c9ee" - ) + ), + ] + + static let allLanguages = [ + "auto": "Auto-detect", + "af": "Afrikaans", + "am": "Amharic", + "ar": "Arabic", + "as": "Assamese", + "az": "Azerbaijani", + "ba": "Bashkir", + "be": "Belarusian", + "bg": "Bulgarian", + "bn": "Bengali", + "bo": "Tibetan", + "br": "Breton", + "bs": "Bosnian", + "ca": "Catalan", + "cs": "Czech", + "cy": "Welsh", + "da": "Danish", + "de": "German", + "el": "Greek", + "en": "English", + "es": "Spanish", + "et": "Estonian", + "eu": "Basque", + "fa": "Persian", + "fi": "Finnish", + "fo": "Faroese", + "fr": "French", + "ga": "Irish", + "gl": "Galician", + "gu": "Gujarati", + "ha": "Hausa", + "he": "Hebrew", + "hi": "Hindi", + "hr": "Croatian", + "ht": "Haitian Creole", + "hu": "Hungarian", + "hy": "Armenian", + "id": "Indonesian", + "is": "Icelandic", + "it": "Italian", + "ja": "Japanese", + "jw": "Javanese", + "ka": "Georgian", + "kk": "Kazakh", + "km": "Khmer", + "kn": "Kannada", + "ko": "Korean", + "la": "Latin", + "lb": "Luxembourgish", + "ln": "Lingala", + "lo": "Lao", + "lt": "Lithuanian", + "lv": "Latvian", + "mg": "Malagasy", + "mi": "Maori", + "mk": "Macedonian", + "ml": "Malayalam", + "mn": "Mongolian", + "mr": "Marathi", + "ms": "Malay", + "mt": "Maltese", + "my": "Myanmar", + "ne": "Nepali", + "nl": "Dutch", + "nn": "Norwegian Nynorsk", + "no": "Norwegian", + "oc": "Occitan", + "pa": "Punjabi", + "pl": "Polish", + "ps": "Pashto", + "pt": "Portuguese", + "ro": "Romanian", + "ru": "Russian", + "sa": "Sanskrit", + "sd": "Sindhi", + "si": "Sinhala", + "sk": "Slovak", + "sl": "Slovenian", + "sn": "Shona", + "so": "Somali", + "sq": "Albanian", + "sr": "Serbian", + "su": "Sundanese", + "sv": "Swedish", + "sw": "Swahili", + "ta": "Tamil", + "te": "Telugu", + "tg": "Tajik", + "th": "Thai", + "tk": "Turkmen", + "tl": "Tagalog", + "tr": "Turkish", + "tt": "Tatar", + "ug": "Uyghur", + "uk": "Ukrainian", + "ur": "Urdu", + "uz": "Uzbek", + "vi": "Vietnamese", + "yi": "Yiddish", + "yo": "Yoruba", + "yue": "Cantonese", + "zh": "Chinese", ] } diff --git a/VoiceInk/Views/LanguageSelectionView.swift b/VoiceInk/Views/LanguageSelectionView.swift index 3ded536..17e191c 100644 --- a/VoiceInk/Views/LanguageSelectionView.swift +++ b/VoiceInk/Views/LanguageSelectionView.swift @@ -2,8 +2,8 @@ import SwiftUI // Define a display mode for flexible usage enum LanguageDisplayMode { - case full // For settings page with descriptions - case menuItem // For menu bar with compact layout + case full // For settings page with descriptions + case menuItem // For menu bar with compact layout } struct LanguageSelectionView: View { @@ -11,115 +11,11 @@ struct LanguageSelectionView: View { @AppStorage("SelectedLanguage") private var selectedLanguage: String = "en" // Add display mode parameter with full as the default var displayMode: LanguageDisplayMode = .full - - let languages = [ - "auto": "Auto-detect", - "af": "Afrikaans", - "am": "Amharic", - "ar": "Arabic", - "as": "Assamese", - "az": "Azerbaijani", - "ba": "Bashkir", - "be": "Belarusian", - "bg": "Bulgarian", - "bn": "Bengali", - "bo": "Tibetan", - "br": "Breton", - "bs": "Bosnian", - "ca": "Catalan", - "cs": "Czech", - "cy": "Welsh", - "da": "Danish", - "de": "German", - "el": "Greek", - "en": "English", - "es": "Spanish", - "et": "Estonian", - "eu": "Basque", - "fa": "Persian", - "fi": "Finnish", - "fo": "Faroese", - "fr": "French", - "ga": "Irish", - "gl": "Galician", - "gu": "Gujarati", - "ha": "Hausa", - "he": "Hebrew", - "hi": "Hindi", - "hr": "Croatian", - "ht": "Haitian Creole", - "hu": "Hungarian", - "hy": "Armenian", - "id": "Indonesian", - "is": "Icelandic", - "it": "Italian", - "ja": "Japanese", - "jw": "Javanese", - "ka": "Georgian", - "kk": "Kazakh", - "km": "Khmer", - "kn": "Kannada", - "ko": "Korean", - "la": "Latin", - "lb": "Luxembourgish", - "ln": "Lingala", - "lo": "Lao", - "lt": "Lithuanian", - "lv": "Latvian", - "mg": "Malagasy", - "mi": "Maori", - "mk": "Macedonian", - "ml": "Malayalam", - "mn": "Mongolian", - "mr": "Marathi", - "ms": "Malay", - "mt": "Maltese", - "my": "Myanmar", - "ne": "Nepali", - "nl": "Dutch", - "nn": "Norwegian Nynorsk", - "no": "Norwegian", - "oc": "Occitan", - "pa": "Punjabi", - "pl": "Polish", - "ps": "Pashto", - "pt": "Portuguese", - "ro": "Romanian", - "ru": "Russian", - "sa": "Sanskrit", - "sd": "Sindhi", - "si": "Sinhala", - "sk": "Slovak", - "sl": "Slovenian", - "sn": "Shona", - "so": "Somali", - "sq": "Albanian", - "sr": "Serbian", - "su": "Sundanese", - "sv": "Swedish", - "sw": "Swahili", - "ta": "Tamil", - "te": "Telugu", - "tg": "Tajik", - "th": "Thai", - "tk": "Turkmen", - "tl": "Tagalog", - "tr": "Turkish", - "tt": "Tatar", - "ug": "Uyghur", - "uk": "Ukrainian", - "ur": "Urdu", - "uz": "Uzbek", - "vi": "Vietnamese", - "yi": "Yiddish", - "yo": "Yoruba", - "zh": "Chinese" - ] - + private func updateLanguage(_ language: String) { // Update UI state - the UserDefaults updating is now automatic with @AppStorage selectedLanguage = language - + // Post notification for language change NotificationCenter.default.post(name: .languageDidChange, object: nil) } @@ -130,14 +26,26 @@ struct LanguageSelectionView: View { let predefinedModel = PredefinedModels.models.first(where: { $0.name == currentModel.name }) else { return false } - return predefinedModel.language == "Multilingual" + return predefinedModel.isMultilingualModel } - + + // Function to get current model's supported languages + private func getCurrentModelLanguages() -> [String: String] { + guard let currentModel = whisperState.currentModel, + let predefinedModel = PredefinedModels.models.first(where: { + $0.name == currentModel.name + }) + else { + return ["en": "English"] // Default to English if no model found + } + return predefinedModel.supportedLanguages + } + // Get the display name of the current language private func currentLanguageDisplayName() -> String { - return languages[selectedLanguage] ?? "Unknown" + return getCurrentModelLanguages()[selectedLanguage] ?? "Unknown" } - + var body: some View { switch displayMode { case .full: @@ -146,20 +54,26 @@ struct LanguageSelectionView: View { menuItemView } } - + // The original full view layout for settings page private var fullView: some View { VStack(alignment: .leading, spacing: 16) { Text("Transcription Language") .font(.headline) - + if let currentModel = whisperState.currentModel, - let predefinedModel = PredefinedModels.models.first(where: { $0.name == currentModel.name }) { - - if predefinedModel.language == "Multilingual" { + let predefinedModel = PredefinedModels.models.first(where: { + $0.name == currentModel.name + }) + { + if isMultilingualModel() { VStack(alignment: .leading, spacing: 8) { Picker("Select Language", selection: $selectedLanguage) { - ForEach(languages.sorted(by: { $0.value < $1.value }), id: \.key) { key, value in + ForEach( + predefinedModel.supportedLanguages.sorted(by: { + $0.value < $1.value + }), id: \.key + ) { key, value in Text(value).tag(key) } } @@ -167,14 +81,16 @@ struct LanguageSelectionView: View { .onChange(of: selectedLanguage) { newValue in updateLanguage(newValue) } - + Text("Current model: \(predefinedModel.displayName)") .font(.caption) .foregroundColor(.secondary) - - Text("This model supports multiple languages. You can choose auto-detect or select a specific language.") - .font(.caption) - .foregroundColor(.secondary) + + Text( + "This model supports multiple languages. You can choose auto-detect or select a specific language." + ) + .font(.caption) + .foregroundColor(.secondary) } } else { // For English-only models, force set language to English @@ -182,14 +98,16 @@ struct LanguageSelectionView: View { Text("Language: English") .font(.subheadline) .foregroundColor(.primary) - + Text("Current model: \(predefinedModel.displayName)") .font(.caption) .foregroundColor(.secondary) - - Text("This is an English-optimized model and only supports English transcription.") - .font(.caption) - .foregroundColor(.secondary) + + Text( + "This is an English-optimized model and only supports English transcription." + ) + .font(.caption) + .foregroundColor(.secondary) } .onAppear { // Ensure English is set when viewing English-only model @@ -207,13 +125,15 @@ struct LanguageSelectionView: View { .background(Color(NSColor.controlBackgroundColor)) .cornerRadius(10) } - + // New compact view for menu bar private var menuItemView: some View { Group { if isMultilingualModel() { Menu { - ForEach(languages.sorted(by: { $0.value < $1.value }), id: \.key) { key, value in + ForEach( + getCurrentModelLanguages().sorted(by: { $0.value < $1.value }), id: \.key + ) { key, value in Button { updateLanguage(key) } label: { diff --git a/VoiceInk/Whisper/WhisperPrompt.swift b/VoiceInk/Whisper/WhisperPrompt.swift index 4608697..e964080 100644 --- a/VoiceInk/Whisper/WhisperPrompt.swift +++ b/VoiceInk/Whisper/WhisperPrompt.swift @@ -24,6 +24,7 @@ class WhisperPrompt: ObservableObject { "zh": "你好,最近好吗?见到你很高兴。", "th": "สวัสดีครับ/ค่ะ, สบายดีไหม? ยินดีที่ได้พบคุณ", "vi": "Xin chào, bạn khỏe không? Rất vui được gặp bạn.", + "yue": "你好,最近點呀?見到你好開心。", // European Languages "es": "¡Hola, ¿cómo estás? Encantado de conocerte.", @@ -115,4 +116,4 @@ class WhisperPrompt: ObservableObject { updateTranscriptionPrompt() } } -} \ No newline at end of file +}