Add support for new line and paragraph
This commit is contained in:
parent
6484b7075d
commit
3b385f44f4
@ -100,7 +100,10 @@ actor WhisperContext {
|
||||
transcription += String(cString: whisper_full_get_segment_text(context, i))
|
||||
}
|
||||
// Apply hallucination filtering
|
||||
return WhisperHallucinationFilter.filter(transcription)
|
||||
let filteredTranscription = WhisperHallucinationFilter.filter(transcription)
|
||||
|
||||
// Always apply text formatting
|
||||
return WhisperTextFormatter.format(filteredTranscription)
|
||||
}
|
||||
|
||||
static func createContext(path: String) async throws -> WhisperContext {
|
||||
|
||||
48
VoiceInk/Whisper/WhisperTextFormatter.swift
Normal file
48
VoiceInk/Whisper/WhisperTextFormatter.swift
Normal file
@ -0,0 +1,48 @@
|
||||
import Foundation
|
||||
|
||||
struct WhisperTextFormatter {
|
||||
static func format(_ text: String) -> String {
|
||||
var formattedText = text
|
||||
|
||||
// First, replace commas with periods before new line/paragraph commands
|
||||
let commaPatterns = [
|
||||
// Replace comma before new paragraph
|
||||
(pattern: ",\\s*new\\s+paragraph", replacement: ". new paragraph"),
|
||||
// Replace comma before new line
|
||||
(pattern: ",\\s*new\\s+line", replacement: ". new line")
|
||||
]
|
||||
|
||||
for (pattern, replacement) in commaPatterns {
|
||||
formattedText = formattedText.replacingOccurrences(
|
||||
of: pattern,
|
||||
with: replacement,
|
||||
options: [.regularExpression, .caseInsensitive]
|
||||
)
|
||||
}
|
||||
|
||||
// Then handle the new line/paragraph commands with any combination of spaces and punctuation
|
||||
let patterns = [
|
||||
// Handle "new paragraph" with any combination of spaces and punctuation
|
||||
(pattern: "\\s*new\\s+paragraph\\s*[,.!?]?\\s*", replacement: "\n\n"),
|
||||
// Handle "new line" with any combination of spaces and punctuation
|
||||
(pattern: "\\s*new\\s+line\\s*[,.!?]?\\s*", replacement: "\n")
|
||||
]
|
||||
|
||||
for (pattern, replacement) in patterns {
|
||||
formattedText = formattedText.replacingOccurrences(
|
||||
of: pattern,
|
||||
with: replacement,
|
||||
options: [.regularExpression, .caseInsensitive]
|
||||
)
|
||||
}
|
||||
|
||||
// Clean up any multiple consecutive newlines (more than 2)
|
||||
formattedText = formattedText.replacingOccurrences(
|
||||
of: "\n{3,}",
|
||||
with: "\n\n",
|
||||
options: .regularExpression
|
||||
)
|
||||
|
||||
return formattedText
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user