Add support for new line and paragraph

This commit is contained in:
Beingpax 2025-05-10 10:29:58 +05:45
parent 6484b7075d
commit 3b385f44f4
2 changed files with 52 additions and 1 deletions

View File

@ -100,7 +100,10 @@ actor WhisperContext {
transcription += String(cString: whisper_full_get_segment_text(context, i))
}
// Apply hallucination filtering
return WhisperHallucinationFilter.filter(transcription)
let filteredTranscription = WhisperHallucinationFilter.filter(transcription)
// Always apply text formatting
return WhisperTextFormatter.format(filteredTranscription)
}
static func createContext(path: String) async throws -> WhisperContext {

View File

@ -0,0 +1,48 @@
import Foundation
struct WhisperTextFormatter {
static func format(_ text: String) -> String {
var formattedText = text
// First, replace commas with periods before new line/paragraph commands
let commaPatterns = [
// Replace comma before new paragraph
(pattern: ",\\s*new\\s+paragraph", replacement: ". new paragraph"),
// Replace comma before new line
(pattern: ",\\s*new\\s+line", replacement: ". new line")
]
for (pattern, replacement) in commaPatterns {
formattedText = formattedText.replacingOccurrences(
of: pattern,
with: replacement,
options: [.regularExpression, .caseInsensitive]
)
}
// Then handle the new line/paragraph commands with any combination of spaces and punctuation
let patterns = [
// Handle "new paragraph" with any combination of spaces and punctuation
(pattern: "\\s*new\\s+paragraph\\s*[,.!?]?\\s*", replacement: "\n\n"),
// Handle "new line" with any combination of spaces and punctuation
(pattern: "\\s*new\\s+line\\s*[,.!?]?\\s*", replacement: "\n")
]
for (pattern, replacement) in patterns {
formattedText = formattedText.replacingOccurrences(
of: pattern,
with: replacement,
options: [.regularExpression, .caseInsensitive]
)
}
// Clean up any multiple consecutive newlines (more than 2)
formattedText = formattedText.replacingOccurrences(
of: "\n{3,}",
with: "\n\n",
options: .regularExpression
)
return formattedText
}
}