diff --git a/VoiceInk/Services/AudioTranscriptionManager.swift b/VoiceInk/Services/AudioTranscriptionManager.swift index 6c6aafb..df50553 100644 --- a/VoiceInk/Services/AudioTranscriptionManager.swift +++ b/VoiceInk/Services/AudioTranscriptionManager.swift @@ -90,6 +90,7 @@ class AudioTranscriptionManager: ObservableObject { try await whisperContext?.fullTranscribe(samples: samples) var text = await whisperContext?.getTranscription() ?? "" text = text.trimmingCharacters(in: .whitespacesAndNewlines) + text = WhisperTextFormatter.format(text) // Apply word replacements if enabled if UserDefaults.standard.bool(forKey: "IsWordReplacementEnabled") { diff --git a/VoiceInk/Whisper/LibWhisper.swift b/VoiceInk/Whisper/LibWhisper.swift index 684a013..547159d 100644 --- a/VoiceInk/Whisper/LibWhisper.swift +++ b/VoiceInk/Whisper/LibWhisper.swift @@ -85,7 +85,7 @@ actor WhisperContext { let langId = whisper_full_lang_id(context) let detectedLang = String(cString: whisper_lang_str(langId)) logger.notice("✅ Transcription completed - Language: \(detectedLang)") - whisper_print_timings(context) + } } @@ -102,8 +102,7 @@ actor WhisperContext { // Apply hallucination filtering let filteredTranscription = WhisperHallucinationFilter.filter(transcription) - // Always apply text formatting - return WhisperTextFormatter.format(filteredTranscription) + return filteredTranscription } static func createContext(path: String) async throws -> WhisperContext { diff --git a/VoiceInk/Whisper/WhisperTextFormatter.swift b/VoiceInk/Whisper/WhisperTextFormatter.swift index 8944f22..235977b 100644 --- a/VoiceInk/Whisper/WhisperTextFormatter.swift +++ b/VoiceInk/Whisper/WhisperTextFormatter.swift @@ -1,59 +1,123 @@ import Foundation +import NaturalLanguage struct WhisperTextFormatter { static func format(_ text: String) -> String { - var formattedText = text + let TARGET_WORD_COUNT = 30 + let MAX_SENTENCES_PER_CHUNK = 4 + let MIN_WORDS_FOR_SIGNIFICANT_SENTENCE = 3 + + var finalFormattedText = "" - // Handle single-word variants - let singleWordPatterns = [ - (pattern: "\\b(newline)\\b", replacement: "new line"), - (pattern: "\\b(newparagraph)\\b", replacement: "new paragraph") - ] + // Attempt to detect the language of the input text + let detectedLanguage = NLLanguageRecognizer.dominantLanguage(for: text) + let tokenizerLanguage = detectedLanguage ?? .english // Fallback to English if detection fails - for (pattern, replacement) in singleWordPatterns { - formattedText = formattedText.replacingOccurrences( - of: pattern, - with: replacement, - options: [.regularExpression, .caseInsensitive] - ) + let sentenceTokenizer = NLTokenizer(unit: .sentence) + sentenceTokenizer.string = text + sentenceTokenizer.setLanguage(tokenizerLanguage) + + var allSentencesFromInput = [String]() + sentenceTokenizer.enumerateTokens(in: text.startIndex..= MIN_WORDS_FOR_SIGNIFICANT_SENTENCE { + currentChunkSignificantSentenceCount += 1 + } + + if currentChunkWordCount >= TARGET_WORD_COUNT { + break // Word target met for this tentative chunk + } + } + + // Now, apply MAX_SENTENCES_PER_CHUNK rule based on significant sentences + var sentencesForThisFinalChunk = [String]() + if currentChunkSignificantSentenceCount > MAX_SENTENCES_PER_CHUNK { + var significantSentencesCountedInTrim = 0 + for sentenceInTentativeChunk in currentChunkTentativeSentences { + sentencesForThisFinalChunk.append(sentenceInTentativeChunk) + + // Re-check if this sentence was significant to count towards the cap + let wordTokenizerForTrimCheck = NLTokenizer(unit: .word) + wordTokenizerForTrimCheck.string = sentenceInTentativeChunk + wordTokenizerForTrimCheck.setLanguage(tokenizerLanguage) + var wordsInCurrentSentenceForTrim = 0 + wordTokenizerForTrimCheck.enumerateTokens(in: sentenceInTentativeChunk.startIndex..= MIN_WORDS_FOR_SIGNIFICANT_SENTENCE { + significantSentencesCountedInTrim += 1 + if significantSentencesCountedInTrim >= MAX_SENTENCES_PER_CHUNK { + break // Reached the cap of significant sentences for this chunk + } + } + } + } else { + sentencesForThisFinalChunk = currentChunkTentativeSentences + } + + if !sentencesForThisFinalChunk.isEmpty { + let segmentStringToAppend = sentencesForThisFinalChunk.joined(separator: " ") + + if !finalFormattedText.isEmpty { + finalFormattedText += "\n\n" + } + finalFormattedText += segmentStringToAppend + + processedSentenceGlobalIndex += sentencesForThisFinalChunk.count + } else { + // Safeguard: if no sentences ended up in the final chunk (e.g. all input was processed) + // or if currentChunkTentativeSentences was empty (should be caught by outer loop condition) + // This ensures we don't loop infinitely if something unexpected happens. + if processedSentenceGlobalIndex >= allSentencesFromInput.count && currentChunkTentativeSentences.isEmpty { + break // All input processed + } else if sentencesForThisFinalChunk.isEmpty && !currentChunkTentativeSentences.isEmpty { + // This implies currentChunkTentativeSentences had items but trimming resulted in zero items for final chunk + // which is unlikely with the logic, but as a safety, advance by what was considered. + processedSentenceGlobalIndex += currentChunkTentativeSentences.count + } else if sentencesForThisFinalChunk.isEmpty && currentChunkTentativeSentences.isEmpty && processedSentenceGlobalIndex < allSentencesFromInput.count { + // No sentences in tentative, means loop above didn't run, implies processedSentenceGlobalIndex needs to catch up or something is wrong + processedSentenceGlobalIndex = allSentencesFromInput.count // Mark as processed to exit + break; + } + else if sentencesForThisFinalChunk.isEmpty { // General catch-all if empty for other reasons + break + } + } } - // Then handle the new line/paragraph commands with any combination of spaces and punctuation - let patterns = [ - // Handle "new paragraph" with any combination of spaces and punctuation - (pattern: "\\s*new\\s+paragraph\\s*[,.!?]?\\s*", replacement: "\n\n"), - // Handle "new line" with any combination of spaces and punctuation - (pattern: "\\s*new\\s+line\\s*[,.!?]?\\s*", replacement: "\n") - ] - - for (pattern, replacement) in patterns { - formattedText = formattedText.replacingOccurrences( - of: pattern, - with: replacement, - options: [.regularExpression, .caseInsensitive] - ) - } - - // Clean up any multiple consecutive newlines (more than 2) - formattedText = formattedText.replacingOccurrences( - of: "\n{3,}", - with: "\n\n", - options: .regularExpression - ) - - return formattedText + return finalFormattedText.trimmingCharacters(in: .whitespacesAndNewlines) } } \ No newline at end of file