feat: per-window detection — only flash windows needing attention

Detector now walks each AXWindow subtree independently and returns
both aggregate signals (for state machine) and a list of AXWindow
element refs for windows with active approval signals.

Overlay reads position/size directly from AXWindow elements via
AXValueGetValue, eliminating the CGWindowList dependency (which
returned empty names for Electron windows anyway).

Daemon passes only the active AXWindow refs to the overlay, so
only the specific window(s) waiting for user input get flashed.

Made-with: Cursor
This commit is contained in:
cottongin
2026-03-10 02:54:15 -04:00
parent bce6ec39f8
commit b31f39268e
5 changed files with 152 additions and 78 deletions

View File

@@ -6,16 +6,21 @@ Detection strategy (based on a11y tree analysis):
NOT as AXButton elements (those are native window controls only).
- We collect both AXStaticText values and AXButton titles, then match against
known keywords for "agent working" and "approval needed" states.
- Detection is per-window: each AXWindow subtree is scanned independently
so only the windows actually needing attention get flashed.
"""
from dataclasses import dataclass
from dataclasses import dataclass, field
import re
from ApplicationServices import (
AXUIElementCreateApplication,
AXUIElementCopyAttributeNames,
AXUIElementCopyAttributeValue,
AXValueGetValue,
kAXValueTypeCGPoint,
kAXValueTypeCGSize,
)
from Cocoa import NSWorkspace
from Cocoa import NSScreen, NSWorkspace
CURSOR_BUNDLE_ID = "com.todesktop.230313mzl4w4u92"
@@ -35,6 +40,13 @@ class UISignals:
approval_needed: bool = False
@dataclass
class PollResult:
"""Result of polling Cursor's a11y tree."""
signals: UISignals
active_windows: list = field(default_factory=list)
def _text_matches(text: str, exact_set: set[str], patterns: list[re.Pattern]) -> bool:
if text in exact_set:
return True
@@ -71,8 +83,13 @@ class CursorDetector:
def __init__(self):
self._pid: int | None = None
def poll(self) -> UISignals | None:
"""Poll Cursor's a11y tree and return detected signals, or None if Cursor isn't running."""
def poll(self) -> PollResult | None:
"""Poll Cursor's a11y tree per-window.
Returns aggregate signals for the state machine and a list of
AXWindow element refs for windows that need user attention.
Returns None if Cursor isn't running.
"""
pid = self._find_cursor_pid()
if pid is None:
self._pid = None
@@ -80,8 +97,38 @@ class CursorDetector:
self._pid = pid
app_element = AXUIElementCreateApplication(pid)
elements = self._collect_elements(app_element, max_depth=15)
return parse_ui_signals(elements)
err, children = AXUIElementCopyAttributeValue(
app_element, "AXChildren", None
)
if err or not children:
return PollResult(signals=UISignals())
aggregate_working = False
aggregate_approval = False
active_windows: list = []
for child in children:
err, role = AXUIElementCopyAttributeValue(child, "AXRole", None)
if err or str(role) != "AXWindow":
continue
elements = self._collect_elements(child, max_depth=15)
signals = parse_ui_signals(elements)
if signals.agent_working:
aggregate_working = True
if signals.approval_needed:
aggregate_approval = True
active_windows.append(child)
return PollResult(
signals=UISignals(
agent_working=aggregate_working,
approval_needed=aggregate_approval,
),
active_windows=active_windows,
)
def _find_cursor_pid(self) -> int | None:
workspace = NSWorkspace.sharedWorkspace()
@@ -129,3 +176,28 @@ class CursorDetector:
results.extend(self._collect_elements(child, max_depth, depth + 1))
return results
def get_ax_window_frame(ax_window) -> tuple | None:
"""Extract an AXWindow's screen frame as an NS-coordinate tuple.
Returns ((x, y), (w, h)) in AppKit coordinates (bottom-left origin),
or None if the attributes can't be read.
"""
_, pos_val = AXUIElementCopyAttributeValue(ax_window, "AXPosition", None)
_, size_val = AXUIElementCopyAttributeValue(ax_window, "AXSize", None)
if pos_val is None or size_val is None:
return None
_, point = AXValueGetValue(pos_val, kAXValueTypeCGPoint, None)
_, size = AXValueGetValue(size_val, kAXValueTypeCGSize, None)
if point is None or size is None:
return None
screen_height = NSScreen.mainScreen().frame().size.height
x = point.x
w = size.width
h = size.height
y = screen_height - point.y - h
return ((x, y), (w, h))