diff --git a/README.md b/README.md index 8aa8e1f3..46ff5ed2 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,8 @@ agent-device press 300 500 --count 12 --interval-ms 45 agent-device press 300 500 --count 6 --hold-ms 120 --interval-ms 30 --jitter-px 2 agent-device press @e5 --count 5 --double-tap agent-device swipe 540 1500 540 500 120 --count 8 --pause-ms 30 --pattern ping-pong +agent-device scrollintoview "Sign in" +agent-device scrollintoview @e42 ``` ## Command Index @@ -180,6 +182,7 @@ Swipe timing: - `swipe` accepts optional `durationMs` (default `250`, range `16..10000`). - Android uses requested swipe duration directly. - iOS uses a safe normalized duration to avoid longpress side effects. +- `scrollintoview` accepts either plain text or a snapshot ref (`@eN`); ref mode uses geometry-based scrolling. ## Skills Install the automation skills listed in [SKILL.md](skills/agent-device/SKILL.md). diff --git a/src/daemon/__tests__/scroll-planner.test.ts b/src/daemon/__tests__/scroll-planner.test.ts new file mode 100644 index 00000000..dd6cb4c3 --- /dev/null +++ b/src/daemon/__tests__/scroll-planner.test.ts @@ -0,0 +1,47 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import { type RawSnapshotNode } from '../../utils/snapshot.ts'; +import { + buildScrollIntoViewPlan, + isRectWithinSafeViewportBand, + resolveViewportRect, +} from '../scroll-planner.ts'; + +function makeNode(index: number, type: string, rect?: RawSnapshotNode['rect']): RawSnapshotNode { + return { index, type, rect }; +} + +test('resolveViewportRect picks containing application/window viewport', () => { + const targetRect = { x: 20, y: 1700, width: 120, height: 40 }; + const nodes: RawSnapshotNode[] = [ + makeNode(0, 'Application', { x: 0, y: 0, width: 390, height: 844 }), + makeNode(1, 'Window', { x: 0, y: 0, width: 390, height: 844 }), + makeNode(2, 'Cell', targetRect), + ]; + const viewport = resolveViewportRect(nodes, targetRect); + assert.deepEqual(viewport, { x: 0, y: 0, width: 390, height: 844 }); +}); + +test('resolveViewportRect returns null when no valid viewport can be inferred', () => { + const targetRect = { x: 20, y: 100, width: 120, height: 40 }; + const nodes: RawSnapshotNode[] = [makeNode(0, 'Cell', undefined)]; + const viewport = resolveViewportRect(nodes, targetRect); + assert.equal(viewport, null); +}); + +test('buildScrollIntoViewPlan computes downward content scroll when target is below safe band', () => { + const targetRect = { x: 20, y: 2100, width: 120, height: 40 }; + const viewportRect = { x: 0, y: 0, width: 390, height: 844 }; + const plan = buildScrollIntoViewPlan(targetRect, viewportRect); + assert.ok(plan); + assert.equal(plan?.direction, 'down'); + assert.ok((plan?.count ?? 0) > 1); +}); + +test('buildScrollIntoViewPlan returns null when already in safe viewport band', () => { + const targetRect = { x: 20, y: 320, width: 120, height: 40 }; + const viewportRect = { x: 0, y: 0, width: 390, height: 844 }; + const plan = buildScrollIntoViewPlan(targetRect, viewportRect); + assert.equal(plan, null); + assert.equal(isRectWithinSafeViewportBand(targetRect, viewportRect), true); +}); diff --git a/src/daemon/handlers/__tests__/interaction.test.ts b/src/daemon/handlers/__tests__/interaction.test.ts index 04abf1d5..f3059ac4 100644 --- a/src/daemon/handlers/__tests__/interaction.test.ts +++ b/src/daemon/handlers/__tests__/interaction.test.ts @@ -185,3 +185,186 @@ test('press coordinates does not treat extra trailing args as selector', async ( assert.deepEqual(dispatchCalls[0]?.positionals, ['100', '200']); assert.equal(sessionStore.get(sessionName)?.actions.length, 1); }); + +test('scrollintoview @ref dispatches geometry-based swipe series', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'default'; + const session = makeSession(sessionName); + session.snapshot = { + nodes: attachRefs([ + { + index: 0, + type: 'Application', + rect: { x: 0, y: 0, width: 390, height: 844 }, + }, + { + index: 1, + type: 'XCUIElementTypeStaticText', + label: 'Far item', + rect: { x: 20, y: 2600, width: 120, height: 40 }, + }, + ]), + createdAt: Date.now(), + backend: 'xctest', + }; + sessionStore.set(sessionName, session); + + const dispatchCalls: Array<{ + command: string; + positionals: string[]; + context: Record | undefined; + }> = []; + let snapshotCallCount = 0; + const response = await handleInteractionCommands({ + req: { + token: 't', + session: sessionName, + command: 'scrollintoview', + positionals: ['@e2'], + flags: {}, + }, + sessionName, + sessionStore, + contextFromFlags, + dispatch: async (_device, command, positionals, _out, context) => { + if (command === 'snapshot') { + snapshotCallCount += 1; + return { + nodes: [ + { index: 0, type: 'Application', rect: { x: 0, y: 0, width: 390, height: 844 } }, + { index: 1, type: 'XCUIElementTypeStaticText', label: 'Far item', rect: { x: 20, y: 320, width: 120, height: 40 } }, + ], + backend: 'xctest', + }; + } + dispatchCalls.push({ command, positionals, context: context as Record | undefined }); + return { ok: true }; + }, + }); + + assert.ok(response); + assert.equal(response.ok, true); + assert.equal(snapshotCallCount, 1); + assert.equal(dispatchCalls.length, 1); + assert.equal(dispatchCalls[0]?.command, 'swipe'); + assert.equal(dispatchCalls[0]?.positionals.length, 5); + assert.equal(dispatchCalls[0]?.context?.pattern, 'one-way'); + assert.equal(dispatchCalls[0]?.context?.pauseMs, 0); + assert.equal(typeof dispatchCalls[0]?.context?.count, 'number'); + assert.ok((dispatchCalls[0]?.context?.count as number) > 1); + + const stored = sessionStore.get(sessionName); + assert.ok(stored); + assert.equal(stored?.actions.length, 1); + assert.equal(stored?.actions[0]?.command, 'scrollintoview'); + const result = (stored?.actions[0]?.result ?? {}) as Record; + assert.equal(result.ref, 'e2'); + assert.equal(result.strategy, 'ref-geometry'); + assert.equal(result.verified, true); +}); + +test('scrollintoview @ref returns immediately when target is already in viewport safe band', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'default'; + const session = makeSession(sessionName); + session.snapshot = { + nodes: attachRefs([ + { + index: 0, + type: 'Application', + rect: { x: 0, y: 0, width: 390, height: 844 }, + }, + { + index: 1, + type: 'XCUIElementTypeStaticText', + label: 'Visible item', + rect: { x: 20, y: 320, width: 120, height: 40 }, + }, + ]), + createdAt: Date.now(), + backend: 'xctest', + }; + sessionStore.set(sessionName, session); + + const dispatchCalls: Array<{ command: string }> = []; + const response = await handleInteractionCommands({ + req: { + token: 't', + session: sessionName, + command: 'scrollintoview', + positionals: ['@e2'], + flags: {}, + }, + sessionName, + sessionStore, + contextFromFlags, + dispatch: async (_device, command) => { + dispatchCalls.push({ command }); + return { ok: true }; + }, + }); + + assert.ok(response); + assert.equal(response.ok, true); + assert.equal(dispatchCalls.length, 0); + if (response.ok) { + assert.equal(response.data?.attempts, 0); + assert.equal(response.data?.alreadyVisible, true); + } +}); + +test('scrollintoview @ref fails if target remains outside viewport after scroll', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'default'; + const session = makeSession(sessionName); + session.snapshot = { + nodes: attachRefs([ + { + index: 0, + type: 'Application', + rect: { x: 0, y: 0, width: 390, height: 844 }, + }, + { + index: 1, + type: 'XCUIElementTypeStaticText', + label: 'Far item', + rect: { x: 20, y: 2600, width: 120, height: 40 }, + }, + ]), + createdAt: Date.now(), + backend: 'xctest', + }; + sessionStore.set(sessionName, session); + + const response = await handleInteractionCommands({ + req: { + token: 't', + session: sessionName, + command: 'scrollintoview', + positionals: ['@e2'], + flags: {}, + }, + sessionName, + sessionStore, + contextFromFlags, + dispatch: async (_device, command) => { + if (command === 'snapshot') { + return { + nodes: [ + { index: 0, type: 'Application', rect: { x: 0, y: 0, width: 390, height: 844 } }, + { index: 1, type: 'XCUIElementTypeStaticText', label: 'Far item', rect: { x: 20, y: 2600, width: 120, height: 40 } }, + ], + backend: 'xctest', + }; + } + return { ok: true }; + }, + }); + + assert.ok(response); + assert.equal(response.ok, false); + if (!response.ok) { + assert.equal(response.error?.code, 'COMMAND_FAILED'); + assert.match(response.error?.message ?? '', /outside viewport/i); + } +}); diff --git a/src/daemon/handlers/interaction.ts b/src/daemon/handlers/interaction.ts index 309adac5..a3286806 100644 --- a/src/daemon/handlers/interaction.ts +++ b/src/daemon/handlers/interaction.ts @@ -1,6 +1,13 @@ import { dispatchCommand, type CommandFlags } from '../../core/dispatch.ts'; import { isCommandSupportedOnDevice } from '../../core/capabilities.ts'; -import { attachRefs, centerOfRect, findNodeByRef, normalizeRef, type RawSnapshotNode } from '../../utils/snapshot.ts'; +import { + attachRefs, + centerOfRect, + findNodeByRef, + normalizeRef, + type RawSnapshotNode, + type SnapshotNode, +} from '../../utils/snapshot.ts'; import type { DaemonCommandContext } from '../context.ts'; import type { DaemonRequest, DaemonResponse, SessionState } from '../types.ts'; import { SessionStore } from '../session-store.ts'; @@ -16,6 +23,7 @@ import { splitSelectorFromArgs, } from '../selectors.ts'; import { withDiagnosticTimer } from '../../utils/diagnostics.ts'; +import { buildScrollIntoViewPlan, isRectWithinSafeViewportBand, resolveViewportRect } from '../scroll-planner.ts'; type ContextFromFlags = ( flags: CommandFlags | undefined, @@ -67,30 +75,24 @@ export async function handleInteractionCommands(params: { if (refInput.startsWith('@')) { const invalidRefFlagsResponse = refSnapshotFlagGuardResponse('press', req.flags); if (invalidRefFlagsResponse) return invalidRefFlagsResponse; - if (!session.snapshot) { - return { ok: false, error: { code: 'INVALID_ARGS', message: 'No snapshot in session. Run snapshot first.' } }; - } - const ref = normalizeRef(refInput); - if (!ref) { - return { - ok: false, - error: { code: 'INVALID_ARGS', message: `${command} requires a ref like @e2` }, - }; - } - let node = findNodeByRef(session.snapshot.nodes, ref); - if (!node?.rect && req.positionals.length > 1) { - const fallbackLabel = req.positionals.slice(1).join(' ').trim(); - if (fallbackLabel.length > 0) { - node = findNodeByLabel(session.snapshot.nodes, fallbackLabel); - } - } - if (!node?.rect) { + const fallbackLabel = req.positionals.length > 1 ? req.positionals.slice(1).join(' ').trim() : ''; + const resolvedRefTarget = resolveRefTarget({ + session, + refInput, + fallbackLabel, + requireRect: true, + invalidRefMessage: `${command} requires a ref like @e2`, + notFoundMessage: `Ref ${refInput} not found or has no bounds`, + }); + if (!resolvedRefTarget.ok) return resolvedRefTarget.response; + const { ref, node, snapshotNodes } = resolvedRefTarget.target; + if (!node.rect) { return { ok: false, error: { code: 'COMMAND_FAILED', message: `Ref ${refInput} not found or has no bounds` }, }; } - const refLabel = resolveRefLabel(node, session.snapshot.nodes); + const refLabel = resolveRefLabel(node, snapshotNodes); const selectorChain = buildSelectorChainForNode(node, session.device.platform, { action: selectorAction }); const { x, y } = centerOfRect(node.rect); const data = await dispatch(session.device, 'press', [String(x), String(y)], req.flags?.out, { @@ -165,25 +167,30 @@ export async function handleInteractionCommands(params: { if (command === 'fill') { const session = sessionStore.get(sessionName); if (req.positionals?.[0]?.startsWith('@')) { + if (!session) { + return { + ok: false, + error: { code: 'SESSION_NOT_FOUND', message: 'No active session. Run open first.' }, + }; + } const invalidRefFlagsResponse = refSnapshotFlagGuardResponse('fill', req.flags); if (invalidRefFlagsResponse) return invalidRefFlagsResponse; - if (!session?.snapshot) { - return { ok: false, error: { code: 'INVALID_ARGS', message: 'No snapshot in session. Run snapshot first.' } }; - } - const ref = normalizeRef(req.positionals[0]); - if (!ref) { - return { ok: false, error: { code: 'INVALID_ARGS', message: 'fill requires a ref like @e2' } }; - } const labelCandidate = req.positionals.length >= 3 ? req.positionals[1] : ''; const text = req.positionals.length >= 3 ? req.positionals.slice(2).join(' ') : req.positionals.slice(1).join(' '); if (!text) { return { ok: false, error: { code: 'INVALID_ARGS', message: 'fill requires text after ref' } }; } - let node = findNodeByRef(session.snapshot.nodes, ref); - if (!node?.rect && labelCandidate) { - node = findNodeByLabel(session.snapshot.nodes, labelCandidate); - } - if (!node?.rect) { + const resolvedRefTarget = resolveRefTarget({ + session, + refInput: req.positionals[0], + fallbackLabel: labelCandidate, + requireRect: true, + invalidRefMessage: 'fill requires a ref like @e2', + notFoundMessage: `Ref ${req.positionals[0]} not found or has no bounds`, + }); + if (!resolvedRefTarget.ok) return resolvedRefTarget.response; + const { ref, node, snapshotNodes } = resolvedRefTarget.target; + if (!node.rect) { return { ok: false, error: { code: 'COMMAND_FAILED', message: `Ref ${req.positionals[0]} not found or has no bounds` } }; } const nodeType = node.type ?? ''; @@ -191,7 +198,7 @@ export async function handleInteractionCommands(params: { nodeType && !isFillableType(nodeType, session.device.platform) ? `fill target ${req.positionals[0]} resolved to "${nodeType}", attempting fill anyway.` : undefined; - const refLabel = resolveRefLabel(node, session.snapshot.nodes); + const refLabel = resolveRefLabel(node, snapshotNodes); const selectorChain = buildSelectorChainForNode(node, session.device.platform, { action: 'fill' }); const { x, y } = centerOfRect(node.rect); const data = await dispatch( @@ -311,23 +318,17 @@ export async function handleInteractionCommands(params: { if (refInput.startsWith('@')) { const invalidRefFlagsResponse = refSnapshotFlagGuardResponse('get', req.flags); if (invalidRefFlagsResponse) return invalidRefFlagsResponse; - if (!session.snapshot) { - return { ok: false, error: { code: 'INVALID_ARGS', message: 'No snapshot in session. Run snapshot first.' } }; - } - const ref = normalizeRef(refInput ?? ''); - if (!ref) { - return { ok: false, error: { code: 'INVALID_ARGS', message: 'get text requires a ref like @e2' } }; - } - let node = findNodeByRef(session.snapshot.nodes, ref); - if (!node && req.positionals.length > 2) { - const labelCandidate = req.positionals.slice(2).join(' ').trim(); - if (labelCandidate.length > 0) { - node = findNodeByLabel(session.snapshot.nodes, labelCandidate); - } - } - if (!node) { - return { ok: false, error: { code: 'COMMAND_FAILED', message: `Ref ${refInput} not found` } }; - } + const labelCandidate = req.positionals.length > 2 ? req.positionals.slice(2).join(' ').trim() : ''; + const resolvedRefTarget = resolveRefTarget({ + session, + refInput, + fallbackLabel: labelCandidate, + requireRect: false, + invalidRefMessage: 'get text requires a ref like @e2', + notFoundMessage: `Ref ${refInput} not found`, + }); + if (!resolvedRefTarget.ok) return resolvedRefTarget.response; + const { ref, node } = resolvedRefTarget.target; const selectorChain = buildSelectorChainForNode(node, session.device.platform, { action: 'get' }); if (sub === 'attrs') { sessionStore.recordAction(session, { @@ -548,6 +549,108 @@ export async function handleInteractionCommands(params: { return { ok: true, data: { predicate, pass: true, selector: resolved.selector.raw } }; } + if (command === 'scrollintoview') { + const session = sessionStore.get(sessionName); + if (!session) { + return { + ok: false, + error: { code: 'SESSION_NOT_FOUND', message: 'No active session. Run open first.' }, + }; + } + const targetInput = req.positionals?.[0] ?? ''; + if (!targetInput.startsWith('@')) { + return null; + } + const invalidRefFlagsResponse = refSnapshotFlagGuardResponse('scrollintoview', req.flags); + if (invalidRefFlagsResponse) return invalidRefFlagsResponse; + const fallbackLabel = req.positionals && req.positionals.length > 1 ? req.positionals.slice(1).join(' ').trim() : ''; + const resolvedRefTarget = resolveRefTarget({ + session, + refInput: targetInput, + fallbackLabel, + requireRect: true, + invalidRefMessage: 'scrollintoview requires a ref like @e2', + notFoundMessage: `Ref ${targetInput} not found or has no bounds`, + }); + if (!resolvedRefTarget.ok) return resolvedRefTarget.response; + const { ref, node, snapshotNodes } = resolvedRefTarget.target; + if (!node.rect) { + return { + ok: false, + error: { code: 'COMMAND_FAILED', message: `Ref ${targetInput} not found or has no bounds` }, + }; + } + const viewportRect = resolveViewportRect(snapshotNodes, node.rect); + if (!viewportRect) { + return { + ok: false, + error: { + code: 'COMMAND_FAILED', + message: `scrollintoview could not infer viewport for ${targetInput}`, + }, + }; + } + const plan = buildScrollIntoViewPlan(node.rect, viewportRect); + const refLabel = resolveRefLabel(node, snapshotNodes); + const selectorChain = buildSelectorChainForNode(node, session.device.platform, { action: 'get' }); + if (!plan) { + sessionStore.recordAction(session, { + command, + positionals: req.positionals ?? [], + flags: req.flags ?? {}, + result: { ref, attempts: 0, alreadyVisible: true, strategy: 'ref-geometry', refLabel, selectorChain }, + }); + return { ok: true, data: { ref, attempts: 0, alreadyVisible: true, strategy: 'ref-geometry' } }; + } + const data = await dispatch( + session.device, + 'swipe', + [String(plan.x), String(plan.startY), String(plan.x), String(plan.endY), '60'], + req.flags?.out, + { + ...contextFromFlags(req.flags, session.appBundleId, session.trace?.outPath), + count: plan.count, + pauseMs: 0, + pattern: 'one-way', + }, + ); + const verification = await verifyRefTargetInViewport({ + session, + flags: req.flags, + sessionStore, + contextFromFlags, + dispatch, + selectorChain, + }); + if (!verification.ok) return verification.response; + sessionStore.recordAction(session, { + command, + positionals: req.positionals ?? [], + flags: req.flags ?? {}, + result: { + ...(data ?? {}), + ref, + attempts: plan.count, + direction: plan.direction, + strategy: 'ref-geometry', + verified: true, + refLabel, + selectorChain, + }, + }); + return { + ok: true, + data: { + ...(data ?? {}), + ref, + attempts: plan.count, + direction: plan.direction, + strategy: 'ref-geometry', + verified: true, + }, + }; + } + return null; } @@ -593,7 +696,7 @@ const REF_UNSUPPORTED_FLAG_MAP: ReadonlyArray<[keyof CommandFlags, string]> = [ ]; function refSnapshotFlagGuardResponse( - command: 'press' | 'fill' | 'get', + command: 'press' | 'fill' | 'get' | 'scrollintoview', flags: CommandFlags | undefined, ): DaemonResponse | null { const unsupported = unsupportedRefSnapshotFlags(flags); @@ -623,3 +726,109 @@ export function unsupportedRefSnapshotFlags(flags: CommandFlags | undefined): st } return unsupported; } + +function resolveRefTarget(params: { + session: SessionState; + refInput: string; + fallbackLabel: string; + requireRect: boolean; + invalidRefMessage: string; + notFoundMessage: string; +}): { ok: true; target: { ref: string; node: SnapshotNode; snapshotNodes: SnapshotNode[] } } | { ok: false; response: DaemonResponse } { + const { session, refInput, fallbackLabel, requireRect, invalidRefMessage, notFoundMessage } = params; + if (!session.snapshot) { + return { + ok: false, + response: { ok: false, error: { code: 'INVALID_ARGS', message: 'No snapshot in session. Run snapshot first.' } }, + }; + } + const ref = normalizeRef(refInput); + if (!ref) { + return { + ok: false, + response: { ok: false, error: { code: 'INVALID_ARGS', message: invalidRefMessage } }, + }; + } + let node = findNodeByRef(session.snapshot.nodes, ref); + if ((!node || (requireRect && !node.rect)) && fallbackLabel.length > 0) { + node = findNodeByLabel(session.snapshot.nodes, fallbackLabel); + } + if (!node || (requireRect && !node.rect)) { + return { + ok: false, + response: { ok: false, error: { code: 'COMMAND_FAILED', message: notFoundMessage } }, + }; + } + return { ok: true, target: { ref, node, snapshotNodes: session.snapshot.nodes } }; +} + +async function verifyRefTargetInViewport(params: { + session: SessionState; + flags: CommandFlags | undefined; + sessionStore: SessionStore; + contextFromFlags: ContextFromFlags; + dispatch: typeof dispatchCommand; + selectorChain: string[]; +}): Promise<{ ok: true } | { ok: false; response: DaemonResponse }> { + const { session, flags, sessionStore, contextFromFlags, dispatch, selectorChain } = params; + if (selectorChain.length === 0) { + return { + ok: false, + response: { ok: false, error: { code: 'COMMAND_FAILED', message: 'scrollintoview verification selector is empty' } }, + }; + } + let chainExpression = ''; + try { + chainExpression = selectorChain.join(' || '); + parseSelectorChain(chainExpression); + } catch { + return { + ok: false, + response: { ok: false, error: { code: 'COMMAND_FAILED', message: 'scrollintoview verification selector is invalid' } }, + }; + } + const snapshot = await captureSnapshotForSession( + session, + flags, + sessionStore, + contextFromFlags, + { interactiveOnly: true }, + dispatch, + ); + const chain = parseSelectorChain(chainExpression); + const resolved = resolveSelectorChain(snapshot.nodes, chain, { + platform: session.device.platform, + requireRect: true, + requireUnique: false, + disambiguateAmbiguous: true, + }); + if (!resolved?.node.rect) { + return { + ok: false, + response: { + ok: false, + error: { code: 'COMMAND_FAILED', message: 'scrollintoview target could not be verified after scrolling' }, + }, + }; + } + const viewportRect = resolveViewportRect(snapshot.nodes, resolved.node.rect); + if (!viewportRect) { + return { + ok: false, + response: { + ok: false, + error: { code: 'COMMAND_FAILED', message: 'scrollintoview could not infer viewport during verification' }, + }, + }; + } + if (!isRectWithinSafeViewportBand(resolved.node.rect, viewportRect)) { + return { + ok: false, + response: { + ok: false, + error: { code: 'COMMAND_FAILED', message: 'scrollintoview target is still outside viewport after scrolling' }, + }, + }; + } + return { ok: true }; +} diff --git a/src/daemon/scroll-planner.ts b/src/daemon/scroll-planner.ts new file mode 100644 index 00000000..1ca9fb94 --- /dev/null +++ b/src/daemon/scroll-planner.ts @@ -0,0 +1,113 @@ +import { centerOfRect, type RawSnapshotNode, type Rect } from '../utils/snapshot.ts'; + +export type ScrollIntoViewPlan = { + x: number; + startY: number; + endY: number; + count: number; + direction: 'up' | 'down'; +}; + +export function resolveViewportRect(nodes: RawSnapshotNode[], targetRect: Rect): Rect | null { + const targetCenter = centerOfRect(targetRect); + const rectNodes = nodes.filter((node) => hasValidRect(node.rect)); + const viewportNodes = rectNodes.filter((node) => { + const type = (node.type ?? '').toLowerCase(); + return type.includes('application') || type.includes('window'); + }); + + const containingViewport = pickLargestRect( + viewportNodes + .map((node) => node.rect as Rect) + .filter((rect) => containsPoint(rect, targetCenter.x, targetCenter.y)), + ); + if (containingViewport) return containingViewport; + + const viewportFallback = pickLargestRect(viewportNodes.map((node) => node.rect as Rect)); + if (viewportFallback) return viewportFallback; + + const genericContaining = pickLargestRect( + rectNodes + .map((node) => node.rect as Rect) + .filter((rect) => containsPoint(rect, targetCenter.x, targetCenter.y)), + ); + if (genericContaining) return genericContaining; + + return null; +} + +export function buildScrollIntoViewPlan(targetRect: Rect, viewportRect: Rect): ScrollIntoViewPlan | null { + const viewportHeight = Math.max(1, viewportRect.height); + const viewportTop = viewportRect.y; + const viewportBottom = viewportRect.y + viewportHeight; + const safeTop = viewportTop + viewportHeight * 0.25; + const safeBottom = viewportBottom - viewportHeight * 0.25; + const targetCenterY = targetRect.y + targetRect.height / 2; + + if (targetCenterY >= safeTop && targetCenterY <= safeBottom) { + return null; + } + + const x = Math.round(viewportRect.x + viewportRect.width / 2); + const dragUpStartY = Math.round(viewportTop + viewportHeight * 0.78); + const dragUpEndY = Math.round(viewportTop + viewportHeight * 0.22); + const dragDownStartY = dragUpEndY; + const dragDownEndY = dragUpStartY; + const swipeStepPx = Math.max(1, Math.abs(dragUpStartY - dragUpEndY) * 0.9); + + if (targetCenterY > safeBottom) { + const delta = targetCenterY - safeBottom; + return { + x, + startY: dragUpStartY, + endY: dragUpEndY, + count: clampInt(Math.ceil(delta / swipeStepPx), 1, 50), + direction: 'down', + }; + } + + const delta = safeTop - targetCenterY; + return { + x, + startY: dragDownStartY, + endY: dragDownEndY, + count: clampInt(Math.ceil(delta / swipeStepPx), 1, 50), + direction: 'up', + }; +} + +export function isRectWithinSafeViewportBand(targetRect: Rect, viewportRect: Rect): boolean { + const viewportHeight = Math.max(1, viewportRect.height); + const viewportTop = viewportRect.y; + const viewportBottom = viewportRect.y + viewportHeight; + const safeTop = viewportTop + viewportHeight * 0.25; + const safeBottom = viewportBottom - viewportHeight * 0.25; + const targetCenterY = targetRect.y + targetRect.height / 2; + return targetCenterY >= safeTop && targetCenterY <= safeBottom; +} + +function hasValidRect(rect: Rect | undefined): rect is Rect { + if (!rect) return false; + return Number.isFinite(rect.x) && Number.isFinite(rect.y) && Number.isFinite(rect.width) && Number.isFinite(rect.height); +} + +function containsPoint(rect: Rect, x: number, y: number): boolean { + return x >= rect.x && x <= rect.x + rect.width && y >= rect.y && y <= rect.y + rect.height; +} + +function pickLargestRect(rects: Rect[]): Rect | null { + let best: Rect | null = null; + let bestArea = -1; + for (const rect of rects) { + const area = rect.width * rect.height; + if (area > bestArea) { + best = rect; + bestArea = area; + } + } + return best; +} + +function clampInt(value: number, min: number, max: number): number { + return Math.min(max, Math.max(min, Math.round(value))); +} diff --git a/src/utils/command-schema.ts b/src/utils/command-schema.ts index a1ffde48..6f90f8bb 100644 --- a/src/utils/command-schema.ts +++ b/src/utils/command-schema.ts @@ -483,8 +483,10 @@ export const COMMAND_SCHEMAS: Record = { allowedFlags: [], }, scrollintoview: { - description: 'Scroll until text appears', - positionalArgs: ['text'], + usageOverride: 'scrollintoview ', + description: 'Scroll until text appears or a snapshot ref is brought into view', + positionalArgs: ['target'], + allowsExtraPositionals: true, allowedFlags: [], }, pinch: { diff --git a/website/docs/docs/commands.md b/website/docs/docs/commands.md index b5374e49..1f2bfe62 100644 --- a/website/docs/docs/commands.md +++ b/website/docs/docs/commands.md @@ -56,6 +56,8 @@ agent-device swipe 540 1500 540 500 120 agent-device swipe 540 1500 540 500 120 --count 8 --pause-ms 30 --pattern ping-pong agent-device longpress 300 500 800 agent-device scroll down 0.5 +agent-device scrollintoview "Sign in" +agent-device scrollintoview @e42 agent-device pinch 2.0 # zoom in 2x (iOS simulator) agent-device pinch 0.5 200 400 # zoom out at coordinates (iOS simulator) ``` @@ -64,6 +66,7 @@ agent-device pinch 0.5 200 400 # zoom out at coordinates (iOS simulator) On Android, `fill` also verifies text and performs one clear-and-retry pass on mismatch. `swipe` accepts an optional `durationMs` argument (default `250ms`, range `16..10000`). On iOS, swipe timing uses a safe normalized duration to avoid longpress side effects. +`scrollintoview` accepts plain text or a snapshot ref (`@eN`); ref mode uses geometry-based scrolling. `longpress` is supported on iOS and Android. `pinch` is iOS simulator-only.