diff --git a/VoiceInk/Whisper/LibWhisper.swift b/VoiceInk/Whisper/LibWhisper.swift index 38f8976..2657b7a 100644 --- a/VoiceInk/Whisper/LibWhisper.swift +++ b/VoiceInk/Whisper/LibWhisper.swift @@ -64,7 +64,6 @@ actor WhisperContext { promptCString = nil params.initial_prompt = nil } - // Adapted from whisper.objc params.print_realtime = true params.print_progress = false @@ -73,12 +72,8 @@ actor WhisperContext { params.translate = false params.n_threads = Int32(maxThreads) params.offset_ms = 0 - params.no_context = false + params.no_context = true params.single_segment = false - - // Adjusted parameters to reduce hallucination - params.suppress_blank = true // Keep suppressing blank outputs - params.suppress_nst = true // Additional suppression of non-speech tokens whisper_reset_timings(context) logger.notice("โš™๏ธ Starting whisper transcription") @@ -104,7 +99,8 @@ actor WhisperContext { for i in 0.. WhisperContext { diff --git a/VoiceInk/Whisper/WhisperHallucinationFilter.swift b/VoiceInk/Whisper/WhisperHallucinationFilter.swift new file mode 100644 index 0000000..f95d59d --- /dev/null +++ b/VoiceInk/Whisper/WhisperHallucinationFilter.swift @@ -0,0 +1,52 @@ +import Foundation +import os + +struct WhisperHallucinationFilter { + private static let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "WhisperHallucinationFilter") + + // Pattern-based approach for detecting hallucinations - focusing on format indicators + private static let hallucinationPatterns = [ + // Text in various types of brackets - the most reliable hallucination indicators + #"\[.*?\]"#, // [Text in square brackets] + #"\(.*?\)"#, // (Text in parentheses) + #"\{.*?\}"#, // {Text in curly braces} + #"<.*?>"#, // + + // Text with special formatting + #"\*.*?\*"#, // *Text with asterisks* + #"_.*?_"#, // _Text with underscores_ + + // Time indicators often added by Whisper + #"(?i)\d{1,2}:\d{2}(:\d{2})?\s*-\s*\d{1,2}:\d{2}(:\d{2})?"# // 00:00 - 00:00 format + ] + + /// Removes hallucinations from transcription text using pattern matching + /// - Parameter text: Original transcription text from Whisper + /// - Returns: Filtered text with hallucinations removed + static func filter(_ text: String) -> String { + logger.notice("๐Ÿงน Applying pattern-based hallucination filter to transcription") + + var filteredText = text + + // Remove pattern-based hallucinations + for pattern in hallucinationPatterns { + if let regex = try? NSRegularExpression(pattern: pattern) { + let range = NSRange(filteredText.startIndex..., in: filteredText) + filteredText = regex.stringByReplacingMatches(in: filteredText, options: [], range: range, withTemplate: "") + } + } + + // Clean up extra whitespace and newlines that might be left after removing hallucinations + filteredText = filteredText.replacingOccurrences(of: #"\s{2,}"#, with: " ", options: .regularExpression) + filteredText = filteredText.trimmingCharacters(in: .whitespacesAndNewlines) + + // Add logging to track effectiveness + if filteredText != text { + logger.notice("โœ… Removed hallucinations using pattern matching") + } else { + logger.notice("โœ… No hallucinations detected with pattern matching") + } + + return filteredText + } +} \ No newline at end of file