Fix OCR capturing VoiceInk status overlay instead of frontmost app window
The screen capture service was selecting the first layer-0 window, which during recording was VoiceInk's own status indicator overlay. This caused OCR to always return 'No text detected' since the overlay has no readable content. Changes: - Filter out windows owned by VoiceInk's process - Prioritize windows belonging to NSWorkspace.frontmostApplication - Filter out tiny windows (<120x120) to avoid tooltips/overlays - Move CGWindowListCopyWindowInfo off main thread for better UI responsiveness - Refactor WindowCandidate struct to class scope
This commit is contained in:
parent
557ac95478
commit
3b043f4da9
@ -14,27 +14,74 @@ class ScreenCaptureService: ObservableObject {
|
|||||||
category: "aienhancement"
|
category: "aienhancement"
|
||||||
)
|
)
|
||||||
|
|
||||||
private func getActiveWindowInfo() -> (title: String, ownerName: String, windowID: CGWindowID)? {
|
private struct WindowCandidate {
|
||||||
let windowListInfo = CGWindowListCopyWindowInfo([.optionOnScreenOnly], kCGNullWindowID) as? [[String: Any]] ?? []
|
let title: String
|
||||||
|
let ownerName: String
|
||||||
|
let windowID: CGWindowID
|
||||||
|
let ownerPID: pid_t
|
||||||
|
let layer: Int32
|
||||||
|
let bounds: CGRect
|
||||||
|
}
|
||||||
|
|
||||||
if let frontWindow = windowListInfo.first(where: { info in
|
private func getActiveWindowInfo() async -> (title: String, ownerName: String, windowID: CGWindowID)? {
|
||||||
let layer = info[kCGWindowLayer as String] as? Int32 ?? 0
|
let currentPID = ProcessInfo.processInfo.processIdentifier
|
||||||
return layer == 0
|
let frontmostPID = await NSWorkspace.shared.frontmostApplication?.processIdentifier
|
||||||
}) {
|
|
||||||
guard let windowID = frontWindow[kCGWindowNumber as String] as? CGWindowID,
|
// Move expensive window list retrieval off the main thread
|
||||||
let ownerName = frontWindow[kCGWindowOwnerName as String] as? String,
|
let candidates = await Task.detached(priority: .userInitiated) { () -> [WindowCandidate] in
|
||||||
let title = frontWindow[kCGWindowName as String] as? String else {
|
let windowListInfo = CGWindowListCopyWindowInfo([.optionOnScreenOnly], kCGNullWindowID) as? [[String: Any]] ?? []
|
||||||
return nil
|
|
||||||
|
return windowListInfo.compactMap { info -> WindowCandidate? in
|
||||||
|
guard let windowID = info[kCGWindowNumber as String] as? CGWindowID,
|
||||||
|
let ownerName = info[kCGWindowOwnerName as String] as? String,
|
||||||
|
let ownerPIDNumber = info[kCGWindowOwnerPID as String] as? NSNumber,
|
||||||
|
let layer = info[kCGWindowLayer as String] as? Int32,
|
||||||
|
let boundsDict = info[kCGWindowBounds as String] as? [String: Any],
|
||||||
|
let width = boundsDict["Width"] as? CGFloat,
|
||||||
|
let height = boundsDict["Height"] as? CGFloat else {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
let rawTitle = (info[kCGWindowName as String] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||||
|
let resolvedTitle = rawTitle?.isEmpty == false ? rawTitle! : ownerName
|
||||||
|
let bounds = CGRect(
|
||||||
|
x: boundsDict["X"] as? CGFloat ?? 0,
|
||||||
|
y: boundsDict["Y"] as? CGFloat ?? 0,
|
||||||
|
width: width,
|
||||||
|
height: height
|
||||||
|
)
|
||||||
|
|
||||||
|
return WindowCandidate(
|
||||||
|
title: resolvedTitle,
|
||||||
|
ownerName: ownerName,
|
||||||
|
windowID: windowID,
|
||||||
|
ownerPID: ownerPIDNumber.int32Value,
|
||||||
|
layer: layer,
|
||||||
|
bounds: bounds
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
}.value
|
||||||
|
|
||||||
return (title: title, ownerName: ownerName, windowID: windowID)
|
func isEligible(_ candidate: WindowCandidate) -> Bool {
|
||||||
|
guard candidate.layer == 0 else { return false }
|
||||||
|
guard candidate.ownerPID != currentPID else { return false }
|
||||||
|
return candidate.bounds.width >= 120 && candidate.bounds.height >= 120
|
||||||
|
}
|
||||||
|
|
||||||
|
if let frontmostPID = frontmostPID,
|
||||||
|
let focusedWindow = candidates.first(where: { isEligible($0) && $0.ownerPID == frontmostPID }) {
|
||||||
|
return (title: focusedWindow.title, ownerName: focusedWindow.ownerName, windowID: focusedWindow.windowID)
|
||||||
|
}
|
||||||
|
|
||||||
|
if let fallbackWindow = candidates.first(where: isEligible) {
|
||||||
|
return (title: fallbackWindow.title, ownerName: fallbackWindow.ownerName, windowID: fallbackWindow.windowID)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func captureActiveWindow() async -> NSImage? {
|
func captureActiveWindow() async -> NSImage? {
|
||||||
guard let windowInfo = getActiveWindowInfo() else {
|
guard let windowInfo = await getActiveWindowInfo() else {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -111,7 +158,7 @@ class ScreenCaptureService: ObservableObject {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
guard let windowInfo = getActiveWindowInfo() else {
|
guard let windowInfo = await getActiveWindowInfo() else {
|
||||||
logger.notice("📸 No active window found")
|
logger.notice("📸 No active window found")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user