diff --git a/VoiceInk/Whisper/LibWhisper.swift b/VoiceInk/Whisper/LibWhisper.swift index 2657b7a..684a013 100644 --- a/VoiceInk/Whisper/LibWhisper.swift +++ b/VoiceInk/Whisper/LibWhisper.swift @@ -100,7 +100,10 @@ actor WhisperContext { transcription += String(cString: whisper_full_get_segment_text(context, i)) } // Apply hallucination filtering - return WhisperHallucinationFilter.filter(transcription) + let filteredTranscription = WhisperHallucinationFilter.filter(transcription) + + // Always apply text formatting + return WhisperTextFormatter.format(filteredTranscription) } static func createContext(path: String) async throws -> WhisperContext { diff --git a/VoiceInk/Whisper/WhisperTextFormatter.swift b/VoiceInk/Whisper/WhisperTextFormatter.swift new file mode 100644 index 0000000..6576ee7 --- /dev/null +++ b/VoiceInk/Whisper/WhisperTextFormatter.swift @@ -0,0 +1,48 @@ +import Foundation + +struct WhisperTextFormatter { + static func format(_ text: String) -> String { + var formattedText = text + + // First, replace commas with periods before new line/paragraph commands + let commaPatterns = [ + // Replace comma before new paragraph + (pattern: ",\\s*new\\s+paragraph", replacement: ". new paragraph"), + // Replace comma before new line + (pattern: ",\\s*new\\s+line", replacement: ". new line") + ] + + for (pattern, replacement) in commaPatterns { + formattedText = formattedText.replacingOccurrences( + of: pattern, + with: replacement, + options: [.regularExpression, .caseInsensitive] + ) + } + + // Then handle the new line/paragraph commands with any combination of spaces and punctuation + let patterns = [ + // Handle "new paragraph" with any combination of spaces and punctuation + (pattern: "\\s*new\\s+paragraph\\s*[,.!?]?\\s*", replacement: "\n\n"), + // Handle "new line" with any combination of spaces and punctuation + (pattern: "\\s*new\\s+line\\s*[,.!?]?\\s*", replacement: "\n") + ] + + for (pattern, replacement) in patterns { + formattedText = formattedText.replacingOccurrences( + of: pattern, + with: replacement, + options: [.regularExpression, .caseInsensitive] + ) + } + + // Clean up any multiple consecutive newlines (more than 2) + formattedText = formattedText.replacingOccurrences( + of: "\n{3,}", + with: "\n\n", + options: .regularExpression + ) + + return formattedText + } +} \ No newline at end of file